set more off
cap log close

*************************
*************************
***creates analysis dataset for first stage***
***Pollution Monitors****
***David Simon******
*************************
*************************

***Work draws from Claudia's Wind/school file and Mike's distance file
***This revision of the first stage,

	

**************Globals********************
	*Set your golbal here
	global home "C:\Users\Claudia Persico\Dropbox\Research on Florida Wind Patters"


	global roadsetup "$home/roadsetup/samples/"
	global output "$home/dofiles/makedata"
	global samples "$home/School wind pollution/schools downwind"

cap log using "$output/mk schools downwind loop.log", replace

********************************************************************************
***Now, do 5 closest stuff:

display "5 closest"
forval yy = 2003/2012 {

*****************merge on closest road**********;
	use "$roadsetup/remake/schwind5mi_schroad1mi_mjrrds_5nearestA_`yy'.dta", clear
	merge m:1 ncessch using "$roadsetup/remake/schooltomajorrds_mjrrds_5closestA.dta"
	tab _merge

*note, the using data has observations we dropped either from being more
*than half a mile from a highway or  observations we couldn't fill in wind data for

	drop if _merge==2
	drop _merge

	forv i=1/5 {
		gen keep=1 if strpos(ROUTE`i', "I")
		replace keep=1 if strpos(ROUTE`i', "US")
		replace angle_degrees`i'=. if keep !=1
		drop keep
		}

***JAH 9/18/17: Replacing windvalue as missing if=0
rename windvalue windvalue1
rename im_windvalue_1dayout windvalueimp	
drop im_circmean_1dayout im_circmean_3dayout im_windvalue_3dayout
	replace windvalue1=. if windvalue1==0


* extract date from date time
	gen date=dofc(datetime)

************impute windvalue if missing values and calculate the circlear mean*****************	
* DES 7/20/17: Make a continuous measure of treatment based on windvalue
* JAH 5/23/18: Do it for each of the 5 matches roads
* Modify code to do this for each year
forv i=1/5 {
	gen windtreat`i'= abs(angle_degrees`i'-windvalueimp)   /*8/7/17: updated windvalue1 to windvalueimp */
	label var windtreat`i' "Downwind intensity, windvalue, site `i'"
	replace windtreat`i' = 360 - windtreat`i' if windtreat`i'>180 
	sum windtreat`i' angle_degrees`i' windvalue1

* order for ease in spot check
	order school year windvalue1 windvalueimp angle_degrees`i' windtreat`i'
* now normalize so that it is between 0 and 1 with 1 treated and 0 not
	sum windtreat`i'
	replace windtreat`i' = 1-(windtreat`i'/180)
	replace windtreat`i'=. if mi_to_nid`i'>1

	}
	
	sum windtreat*
	
*********additional data clean up: making month, hour, site-month-hour variable*****************
	egen school_closest = group(ncessch)
	drop month hour
	gen month=month(date)
	gen hour=hh(datetime)
	gen sitemonthhour=school_closest*10000+month*100+hour
	gen sitemonth = school_closest*100+month
	order date datetime month hour

************create down wind variables********************************	
forv i = 1/5 {	
	gen downwind`i'=0 if windvalueimp!=. & angle_degrees`i'!=.
	label var downwind`i' "Downwind of highway, site `i'"
	replace downwind`i'=1 if abs(angle_degrees`i'-windvalueimp)<45   
	replace downwind`i'=1 if abs(angle_degrees`i'-360-windvalueimp)<45&angle_degrees`i'>315&windvalueimp<45
	replace downwind`i'=1 if abs(angle_degrees`i'+360-windvalueimp)<45&angle_degrees`i'<45&windvalueimp>315   	

*now create different values of downwind based on distance
	foreach j in 1 4 5 10  {
	gen downwind`i'_`j'=0 if windvalueimp~=. & angle_degrees`i'~=.
	label var downwind`i'_`j' "Downwind and within `j' 10ths of mi of highway"
	replace downwind`i'_`j'=1 if downwind`i'==1 & mi_to_nid`i'<=`j'/10

*code monitors outside of distance threshold as missing
	replace downwind`i'_`j'=. if mi_to_nid`i'>`j'/10
							  }
	}
************create up wind variables********************************	
	* DES 6/11/18: dropped upwind vars to compress data set, see older code for how we did this

	gen schoolday = (hour>=7 & hour<=16)

***grouping data by distance bins: (JAH 5/30/18: Adding "|windvalueimp==." to make them missing for the collapse, if we don't have treatment data
	foreach j in 4 5 10 {
	*roadcount
		egen rdcnt_win`j'=rownonmiss(downwind*_`j' )
		replace rdcnt_win`j'=. if mi_to_nid1>`j'/10|windvalueimp==.

	*downwind of any road in the hour?
		egen dwind_cnt_win`j'=rowtotal(downwind*_`j' )
		replace dwind_cnt_win`j' =. if mi_to_nid1>`j'/10|windvalueimp==.

	*downwind of any road in the hour?
		egen dwind_any_win`j'=rowmax(downwind*_`j' )			
		replace dwind_any_win`j' =0 if dwind_cnt_win`j'==0
		replace dwind_any_win`j' =. if mi_to_nid1>`j'/10|windvalueimp==.
	}

sum dwind_any_win*


collapse mi_to_nid* AADT* rdcnt_win* dwind_any_win* downwind1_* ncessch year windtreat*, by(uniqueID ROUTE*) fast
rename uniqueID school

forv i=1/5 {
	rename AADT`i' AADT`i'_mjrhwy
	rename mi_to_nid`i' mi_to_nid`i'_mjrhwy
	}

display "made it here" // checking the loop

save "$roadsetup/remake/collapsed_5closest_mjrrds_`yy'.dta", replace	
}

***********************************************************************************

*now merge these together based on school

use "$roadsetup/remake/collapsed_5closest_mjrrds_2003.dta", replace

forvalues yy=2004(1)2012{
	append using "$roadsetup/remake/collapsed_5closest_mjrrds_`yy'.dta"
}
	
save "$roadsetup/remake/collapsed_allyrs.dta", replace

log close
